# set working directory to location of the R file
#setwd("replication/scripts")

library(tidyverse)

load("../Data/prev_exp_agency.RData")
load("../Data/staff_firms_info.RData")
load("../Data/firm_experience_panels.RData")
load("../Data/firm_panel.RData")
#load("../../Data/lost_revenue.RData")
load("../Data/combined_panel.RData")
load("../Data/staff_party.RData")
load("../Data/firm_panel_revenue_distributions.RData")

# prev_exp_firm_year %>%
#   ungroup %>%
#   group_by(report_year) %>%
#   summarize(total_bureaucrats = sum(prev_exp)) %>%
#   ggplot(aes(x = report_year, y = total_bureaucrats)) + geom_bar(stat="identity") +
#   xlab("Year") + ylab("Total") +
#   ggtitle("Lobbyists as Bureaucrats") +
#   ggsave(file = "LaTeX/images/lobbyists_as_bureaucrats.pdf", units="in", width=6, height=4)
# 
# prev_staff_exp %>%
#   ungroup %>%
#   group_by(start_date_year) %>%
#   summarize(total_staff = length(unique(core_person_id))) %>%
#   ggplot(aes(x = start_date_year, y = total_staff)) + geom_bar(stat="identity") +
#   xlab("Year") + ylab("Total") +
#   ggtitle("Lobbyists as Staff") +
#   ggsave(file = "LaTeX/images/lobbyists_as_staff.pdf", units="in", width=6, height=4)

prev_exp_firm$prev_exp2 <- ifelse(is.na(prev_exp_firm$prev_exp)==T,0,prev_exp_firm$prev_exp)
staff_exp_firm$staff_exp2 <- ifelse(is.na(staff_exp_firm$staff_exp)==T,0,staff_exp_firm$staff_exp)


year_periods <- data.frame(first_treat = seq(1,42), report_year = rep(seq(2000, 2020), each=2))

n_treat <- prev_exp_firm %>%
  filter(prev_exp2==1) %>%
  group_by(reg_lobby_core_id) %>%
  summarise(first_treat = min(period, na.rm=T))%>%
  right_join(prev_exp_firm, by = "reg_lobby_core_id")

n_bureau <- n_treat %>%
  filter(duplicated(reg_lobby_core_id) == FALSE) %>%
  group_by(first_treat) %>%
  summarise(n_lobby = n()) %>%
  drop_na(first_treat) %>% 
  left_join(year_periods) %>% 
  rename(value = n_lobby) %>% 
  mutate(name = "New Bureaucrats")

n_treat_staff<- staff_exp_firm %>%
  filter(staff_exp2==1) %>%
  group_by(reg_lobby_core_id) %>%
  summarise(first_treat = min(period, na.rm=T))%>%
  right_join(staff_exp_firm, by = "reg_lobby_core_id")

n_staff <- n_treat_staff %>%
  filter(duplicated(reg_lobby_core_id) == FALSE) %>%
  group_by(first_treat) %>%
  summarise(n_lobby = n()) %>%
  drop_na(first_treat) %>% 
  left_join(year_periods) %>% 
  rename(value = n_lobby) %>% 
  mutate(name = "New Staff")


n_s <- bind_rows(n_bureau, n_staff)


prev_exp_firm_year %>%
  ungroup %>%
  group_by(report_year) %>%
  summarize(total_bureaucrats = sum(prev_exp)) %>% 
  left_join(prev_staff_exp %>%
              ungroup %>%
              unnest(cols = c("treated_years")) %>% 
              group_by(treated_years) %>%
              rename(start_date_year = treated_years) %>% 
              summarize(total_staff = length(unique(core_person_id))), by = c("report_year" = "start_date_year")) %>% 
  filter(report_year < 2020) %>% 
  pivot_longer(-report_year) %>% 
  mutate(name = ifelse(name == "total_bureaucrats", "Lobbyist-Bureaucrats", "Lobobyist-Staff")) %>% 
  ggplot(aes(x = report_year, y = value, linetype=name, group=name)) + 
  
  geom_col(
    data = n_s %>% filter(report_year < 2020),
    aes(x = report_year, y = value, fill = name),
    inherit.aes=F,
    position=position_dodge(), width=.7
  ) +
  geom_line() +
  #geom_line(data = n_staff, aes(x = report_year, y = value), inherit.aes=F, linetype = 2) +
  scale_fill_grey() +
  theme_classic() +
  theme(legend.title = element_blank()) +
  ylab("Number of Lobbyists") + xlab("") 

ggsave(file = "../images/Figure2.pdf", units="in", width=10, height=6)
    



library(panelView)

set.seed(919)

pv <- combined_panel %>% 
  ungroup %>% 
  group_by(reg_lobby_core_id) %>% 
  filter(n() >= 20 & period <= 40 & period > 2) %>% 
  ungroup %>% 
  filter(reg_lobby_core_id %in% sample(unique(combined_panel$reg_lobby_core_id), 1400)) %>% 
  mutate(treated = ifelse(!is.na(prev_exp) | !is.na(staff_exp), 1, 0))


panelView(revenue ~ treated,  data=pv, index = c("reg_lobby_core_id", "period"),
          legend.labs = c("Missing", "Untreated", "Treated"),
          gridOff = T) +
  xlab("Lobbying Period") + ylab("Firm") +
  theme(axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        panel.background = element_rect(fill = "white"),
        plot.background = element_rect(fill = "white"),
        legend.background = element_rect(fill = "#eeeeee"))

ggsave(file = "../images/Figure1.pdf", units="in", height=10, width=8)




# party descriptives ------------------------------------------------------
# !! Note for replication:
  # Table 1 comes from numbers manually calculated in this section

# firm longevity
combined_panel$treat <- ifelse(combined_panel$prev_exp == 1 | combined_panel$staff_exp == 1, 1, 0)
combined_panel$treat <- ifelse(is.na(combined_panel$treat)==T,0,combined_panel$treat)


long <- combined_panel %>%
  group_by(reg_lobby_core_id) %>%
  summarise(long = n()/2)

mean(long$long)

long_treat <- combined_panel %>%
  group_by(reg_lobby_core_id) %>%
  mutate(any_treat = mean(treat, na.rm=T),
         any_treat = ifelse(any_treat > 0, 1, 0)) %>%
  group_by(reg_lobby_core_id, any_treat) %>%
  summarise(long = n()/2)

long_treat %>%
  group_by(any_treat) %>%
  summarise(mean(long))
  
  
load("../Data/staff_desc.RData")

staff_desc %>% 
  group_by(reg_lobby_core_id) %>% 
  summarize(total = length(core_person_id),
            dems = length(core_person_id[party=="Democrat" & !is.na(party)]),
            reps = length(core_person_id[party == "Republican" & !is.na(party)])) %>% 
  filter(dems > 0 & reps > 0) %>% 
  ungroup %>% 
  mutate(total_lobs = sum(total))

# 32 firms out of 340 have lobbyists go to both parties, pretty split
# these 32 firms accounts for 143 of all lobbyists


staff_desc %>% 
  filter(report_year.y >= 2002) %>% 
  group_by(report_year.y) %>% 
  summarize(Total = length(core_person_id),
            `Dem.` = length(core_person_id[party=="Democrat" & !is.na(party)]),
            `Rep.` = length(core_person_id[party == "Republican" & !is.na(party)])) %>% 
  pivot_longer(-report_year.y, names_to = "Party") %>% 
  ggplot(aes(x = report_year.y, y = value, group=Party, color=Party, linetype=Party)) + geom_line() +
  scale_color_manual(values = c("dodgerblue", "firebrick", "forestgreen")) +
  theme_classic() +
  xlab("Year") + ylab("Lobbyists as Staff")

  
ggsave(file = "../images/FigureA2.pdf", units="in", width=8, height=6)


staff_desc %>% 
  filter(report_year.y >= 2002) %>% 
  group_by(report_year.y) %>% 
  summarize(Total = length(core_person_id),
            Senate = length(core_person_id[office_type=="House" & !is.na(party)]),
            House = length(core_person_id[office_type == "Senate" & !is.na(party)])) %>% 
  mutate(Cmte = Total-(Senate+House)) %>% 
  pivot_longer(-report_year.y, names_to = "Office Type") %>% 
  ggplot(aes(x = report_year.y, y = value, group=`Office Type`, color=`Office Type`, linetype = `Office Type`)) + geom_line() +
  theme_classic() +
  xlab("Year") + ylab("Lobbyists as Staff") 

  ggsave(file = "../images/FigureA1.pdf", units="in", width=8, height=6)

staff_desc %>% 
  group_by(reg_lobby_core_id) %>% 
  summarize(total = length(core_person_id),
            senate = length(core_person_id[office_type=="House" & !is.na(party)]),
            house = length(core_person_id[office_type == "Senate" & !is.na(party)])) %>% 
  mutate(cmte = total-(senate+house)) %>% 
  #filter(dems > 0 & reps > 0) %>% 
  ungroup %>% 
  mutate(total_lobs = sum(total), total_senate = sum(senate), total_house = sum(house)) %>%
  filter(senate > 0 & house > 0) %>% View
  
#287/610 senate, 89/610 house
# 30 firms have staff go to senate and house personal offices

# dest by agency:
prev_exp_agency %>% group_by(organization_name) %>% 
  summarize(n = n()) %>% arrange(desc(n)) %>% View
# whtie house: 50
# dept ed: 16
# labor: 11
# USTR 10
# US DHHS 9
# interior: 9


firms <- staff_desc %>% 
  group_by(reg_lobby_core_id) %>% 
  summarize(total = length(core_person_id),
            house = length(core_person_id[office_type=="House" & !is.na(party)]),
            senate = length(core_person_id[office_type == "Senate" & !is.na(party)])) %>% 
  mutate(cmte = total-(senate+house)) %>% 
  #filter(dems > 0 & reps > 0) %>% 
  ungroup %>% 
  mutate(total_lobs = sum(total), total_senate = sum(senate), total_house = sum(house))

bind_rows(staff_exp_firm, prev_exp_firm) %>% filter(!is.na(staff_exp) | !is.na(prev_exp)) %>%  distinct(reg_lobby_core_id)
#412 firms

# 113 with agency destinations
length(unique(prev_exp_agency$reg_lobby_core_id))


# R&R stuff ---------------------------------------------------------------


## What types of congressional offices (or committees) do they move to? 
load("../Data/staff_bg.RData")

library(kableExtra)

staff_bg.cmte %>% 
  ungroup %>% 
  group_by(name) %>% 
  summarize(total = n()) %>% 
  arrange(desc(total)) %>%
  filter(str_detect(name, "Committee")) %>% 
  slice(1:15) %>% 
  kbl(
  caption = "Committees with 5 or more firm lobbyists",
  label = "cmte_desc",
  col.names = c("Committee", "Total Lobbyists"),
  booktabs = T,
  "latex"
) %>% 
  save_kable(file = "../tables/TableA1.tex")


staff_bg.mem %>% 
  ungroup %>% 
  mutate(dwnom_abs = abs(dwnom1)) %>% 
  group_by(report_year) %>% 
  summarize(female = mean(female, na.rm=T),
            votepct = mean(votepct, na.rm=T),
            dwnom_abs = mean(dwnom_abs, na.rm=T),
            power = mean(power, na.rm=T),
            chair = mean(chair, na.rm = T),
            seniority = mean(seniority, na.rm=T),
            dem.y = mean(dem.y, na.rm=T),
            total = n()) %>% 
  mutate(report_year = as.character(report_year)) %>% 
  bind_rows(
    staff_bg.mem %>% 
      ungroup %>% 
      mutate(dwnom_abs = abs(dwnom1)) %>% 
      summarize(female = mean(female, na.rm=T),
                votepct = mean(votepct, na.rm=T),
                dwnom_abs = mean(dwnom_abs, na.rm=T),
                power = mean(power, na.rm=T),
                chair = mean(chair, na.rm = T),
                seniority = mean(seniority, na.rm=T),
                dem.y = mean(dem.y, na.rm=T),
                total = n()) %>% 
      mutate(report_year = "Total") 
  ) %>% 
  mutate_at(vars(female:dem.y), ~round(., 2)) %>% 
  kbl(
    caption = "Descriptives of personal offices lobbyists join",
    label = "personal_desc",
    col.names = c("Congress", "Female", "Vote Pct.", "Ideol. Extremity (Abs DWN)", "Power Cmte. Member", "Cmte. Chair", "Seniority", "Prop. Democrat", "Total Lobbyists"),
    booktabs = T,
    "latex"
  ) %>% 
  save_kable(file = "../tables/TableA2.tex")

